Lees data in en beoordeel kwaliteit
myDF <- read.csv2("hra-data.csv")
head(myDF)
summary(myDF)
Peildatum Stam.nummer Stromen Naam Man.Vrouw
31-01-2014:1744 Min. : 1.0 :4081 a : 698 Man :4023
31-01-2015:1934 1st Qu.: 319.5 - : 4 o : 400 Vrouw:4360
31-01-2016:2444 Median : 591.0 Doorstroom IN :2123 e : 396
31-01-2017:2261 Mean : 590.2 Doorstroom UIT:1705 h : 396
3rd Qu.: 852.0 Instroom : 305 n : 381
Max. :2465.0 Uitstroom : 165 k : 377
(Other):5735
Geboortedatum Leeftijd Dienst.jaren Organisatie Organisatie.niveau.1
04-04-1986: 19 Min. :20.20 Min. : 0.100 Banking:8383 Banking and Payment:6272
10-07-1954: 19 1st Qu.:37.85 1st Qu.: 2.700 Corporate Services :2111
02-09-1963: 18 Median :47.50 Median : 6.200
03-05-1965: 18 Mean :46.27 Mean : 8.964
30-03-1976: 18 3rd Qu.:54.50 3rd Qu.:12.500
15-04-1953: 17 Max. :71.20 Max. :35.900
(Other) :8274
Organisatie.niveau.2 Organisatie.niveau.3
Business Support Services:1574 Business Support Services:1573
Private Banking :1307 Domestic Markets :1048
Domestic Markets :1278 Large Accounts : 848
Investments :1213 National Investments : 657
Securities : 692 Agriculture Credit : 560
Auditing : 595 Bank and Trusts : 464
(Other) :1724 (Other) :3233
Functie Garantieschaal Functieschaal FTE Type.Contract
Business Banker :3197 Min. : 2.00 Min. : 1.00 Min. :0.0250 Tijdelijk:1358
Consumer Banker :1700 1st Qu.: 9.00 1st Qu.: 9.00 1st Qu.:0.6000 Vast :7025
Administration officer: 556 Median :11.00 Median :11.00 Median :0.8000
Investment officer : 351 Mean :10.42 Mean :10.24 Mean :0.7624
Investment assistant : 240 3rd Qu.:12.00 3rd Qu.:12.00 3rd Qu.:1.0000
Business Broker : 194 Max. :18.00 Max. :17.00 Max. :1.0000
(Other) :2145
Type.Arbeidsovereenkomst Hoogstgenoten.opleiding Aantal.ziekmeldingen Dagen.afwezig
D2:7027 :1674 - :3895 - :3575
D3: 206 Associate : 981 1 :2195 3,00 : 218
D4:1150 Bachelor :2660 2 :1284 1,00 : 203
Master :2875 3 : 592 2,00 : 198
Ph.D/Doctoral: 193 4 : 267 4,00 : 180
5 : 93 6,00 : 158
(Other): 57 (Other):3851
Leidinggevende Naam.leidingegevende Einddatum.Bepaalde.Tijd
- :2631 - :2639 31-12-2099:6949
780 : 286 m : 995 14-03-2015: 184
781 : 247 l : 570 13-03-2016: 134
184 : 246 j : 460 14-04-2015: 103
608 : 240 r : 430 13-03-2017: 92
655 : 216 o : 376 13-04-2016: 68
(Other):4517 (Other):2913 (Other) : 853
Verander data types, filter en voeg kolommen toe
myDF$Peildatum <- as.Date(myDF$Peildatum, format="%d-%m-%Y")
myDF$Geboortedatum <- as.Date(myDF$Geboortedatum, format="%d-%m-%Y")
myDF$Einddatum.Bepaalde.Tijd <- as.Date(myDF$Einddatum.Bepaalde.Tijd, format="%d-%m-%Y")
myDF$Stromen <- gsub(" ", "", myDF$Stromen)
myDF$Aantal.ziekmeldingen <- gsub("-", "", myDF$Aantal.ziekmeldingen)
myDF$Aantal.ziekmeldingen <- as.numeric(gsub(",", ".", myDF$Aantal.ziekmeldingen))
myDF$Dagen.afwezig <- gsub("-", "", myDF$Dagen.afwezig)
myDF$Dagen.afwezig <- as.numeric(gsub(",", ".", myDF$Dagen.afwezig))
# # Sommige personen hebben de organisatie eerder verlaten
start.date <- as.Date("2016-01-31")
myDF <- myDF %>% filter(Peildatum == "2017-01-31", Einddatum.Bepaalde.Tijd > start.date, Stromen == "")
myDF <- mutate(myDF, FTE.x.werkdagen=FTE*365, FTE.x.ziektedagen=FTE*Dagen.afwezig)
summary(myDF)
Peildatum Stam.nummer Stromen Naam Man.Vrouw
Min. :2017-01-31 Min. : 1.0 Length:1064 a : 93 Man :498
1st Qu.:2017-01-31 1st Qu.: 342.8 Class :character h : 52 Vrouw:566
Median :2017-01-31 Median : 628.5 Mode :character o : 52
Mean :2017-01-31 Mean : 627.1 e : 50
3rd Qu.:2017-01-31 3rd Qu.: 927.2 d : 49
Max. :2017-01-31 Max. :1230.0 q : 49
(Other):719
Geboortedatum Leeftijd Dienst.jaren Organisatie
Min. :1945-11-01 Min. :24.20 Min. : 1.100 Banking:1064
1st Qu.:1960-10-26 1st Qu.:40.80 1st Qu.: 2.800
Median :1966-12-06 Median :50.15 Median : 6.400
Mean :1968-07-20 Mean :48.53 Mean : 9.247
3rd Qu.:1976-03-30 3rd Qu.:56.30 3rd Qu.:12.700
Max. :1992-12-06 Max. :71.20 Max. :35.900
Organisatie.niveau.1 Organisatie.niveau.2
Banking and Payment:811 Business Support Services:287
Corporate Services :253 Investments :164
Private Banking :151
Domestic Markets :113
Securities : 82
Auditing : 67
(Other) :200
Organisatie.niveau.3 Functie Garantieschaal
Business Support Services:287 Business Banker :362 Min. : 2.00
Large Accounts :100 Consumer Banker :239 1st Qu.: 9.00
International Investments: 91 Administration officer: 84 Median :11.00
Domestic Markets : 89 Investment officer : 42 Mean :10.45
Corporate Investments : 64 Business Broker : 36 3rd Qu.:12.00
Bank and Trusts : 63 Investment assistant : 35 Max. :18.00
(Other) :370 (Other) :266
Functieschaal FTE Type.Contract Type.Arbeidsovereenkomst
Min. : 1.00 Min. :0.0250 Tijdelijk:160 D2:904
1st Qu.: 9.00 1st Qu.:0.6000 Vast :904 D3: 23
Median :11.00 Median :0.8000 D4:137
Mean :10.25 Mean :0.7629
3rd Qu.:12.00 3rd Qu.:1.0000
Max. :17.00 Max. :1.0000
Hoogstgenoten.opleiding Aantal.ziekmeldingen Dagen.afwezig Leidinggevende
:222 Min. :1.000 Min. : 0.00 - :325
Associate :126 1st Qu.:1.000 1st Qu.: 2.40 1175 : 37
Bachelor :328 Median :1.000 Median : 5.60 184 : 33
Master :362 Mean :1.842 Mean : 20.98 1179 : 30
Ph.D/Doctoral: 26 3rd Qu.:2.000 3rd Qu.: 14.00 1178 : 28
Max. :7.000 Max. :380.52 780 : 27
NA's :501 NA's :461 (Other):584
Naam.leidingegevende Einddatum.Bepaalde.Tijd FTE.x.werkdagen FTE.x.ziektedagen
- :326 Min. :2016-12-12 Min. : 9.125 Min. : 0.00
m :131 1st Qu.:2099-12-31 1st Qu.:219.000 1st Qu.: 1.92
l : 76 Median :2099-12-31 Median :292.000 Median : 4.48
a : 65 Mean :2086-11-23 Mean :278.476 Mean : 17.09
r : 54 3rd Qu.:2099-12-31 3rd Qu.:365.000 3rd Qu.: 12.00
j : 52 Max. :2099-12-31 Max. :365.000 Max. :380.52
(Other):360 NA's :461
Bereken ziekteverzuim
tot.aant.ziektedagen <- sum(myDF$FTE.x.ziektedagen, na.rm = TRUE)
tot.aant.werkdagen <- sum(myDF$FTE.x.werkdagen)
ziekteverzuim = tot.aant.ziektedagen / tot.aant.werkdagen
tot.aant.ziektedagen
[1] 10307.52
tot.aant.werkdagen
[1] 296298.6
ziekteverzuim
[1] 0.0347876
Visualiseer data
p1 <- ggplot(myDF) +
geom_freqpoly(aes(x=Leeftijd, color = Man.Vrouw), binwidth = 2)
ggplotly(p1, width = 800)
p2 <- ggplot(myDF) +
geom_boxplot(aes(x=Man.Vrouw, y=Leeftijd, fill=Man.Vrouw))
ggplotly(p2, width = 800)
p3 <- ggplot(myDF) +
geom_point(aes(x=Leeftijd, y=Dagen.afwezig, fill=Man.Vrouw))
ggplotly(p3, width = 800)
p4 <- ggplot(myDF) +
geom_point(aes(x=Organisatie.niveau.1 , y=Dagen.afwezig, fill=Type.Contract))
ggplotly(p4, width = 800)
p4 <- ggplot(myDF) +
geom_boxplot(aes(x=Man.Vrouw , y=FTE, fill=Man.Vrouw))
ggplotly(p4, width = 800)
Multivariate analysis
grpMV <- group_by(myDF, Man.Vrouw)
summarize(grpMV, Aantal = n(), Gem.Leeftijd = mean(Leeftijd))
summarize(grpMV, Aantal = n(), Gem.dgn.afw = mean(Dagen.afwezig, na.rm = TRUE))
grpTC <- group_by(myDF, Type.Contract)
summarize(grpTC, Aantal = n(), Gem.Leeftijd = mean(Leeftijd))
summarize(grpTC, Aantal = n(), Gem.dgn.afw = mean(Dagen.afwezig, na.rm = TRUE))
m1<- aov(Dagen.afwezig ~ Type.Contract, data = myDF)
summary(m1)
Df Sum Sq Mean Sq F value Pr(>F)
Type.Contract 1 705 704.7 0.352 0.553
Residuals 601 1202574 2001.0
461 observations deleted due to missingness
m2<- aov(Dagen.afwezig ~ Organisatie.niveau.1, data = myDF)
summary(m2)
Df Sum Sq Mean Sq F value Pr(>F)
Organisatie.niveau.1 1 261 261.2 0.13 0.718
Residuals 601 1203018 2001.7
461 observations deleted due to missingness
m3<- aov(FTE ~ Man.Vrouw, data = myDF)
summary(m3)
Df Sum Sq Mean Sq F value Pr(>F)
Man.Vrouw 1 5.61 5.608 105.5 <2e-16 ***
Residuals 1062 56.43 0.053
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
plot(TukeyHSD(m3, conf.level = 0.99),las=1, col = "red")

LS0tCnRpdGxlOiAiSFIgQW5hbHl0aWNzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMgTGFhZCBwYWNrYWdlcwoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgaW5jbHVkZT1GQUxTRSwgcGFnZWQucHJpbnQ9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHBsb3RseSkKYGBgCgojIyMgTGVlcyBkYXRhIGluIGVuIGJlb29yZGVlbCBrd2FsaXRlaXQKCmBgYHtyfQpteURGIDwtIHJlYWQuY3N2MigiaHJhLWRhdGEuY3N2IikKaGVhZChteURGKQpzdW1tYXJ5KG15REYpCmBgYAoKIyMgVmVyYW5kZXIgZGF0YSB0eXBlcywgZmlsdGVyIGVuIHZvZWcga29sb21tZW4gdG9lCgpgYGB7cn0KbXlERiRQZWlsZGF0dW0gPC0gYXMuRGF0ZShteURGJFBlaWxkYXR1bSwgZm9ybWF0PSIlZC0lbS0lWSIpCm15REYkR2Vib29ydGVkYXR1bSA8LSBhcy5EYXRlKG15REYkR2Vib29ydGVkYXR1bSwgZm9ybWF0PSIlZC0lbS0lWSIpCm15REYkRWluZGRhdHVtLkJlcGFhbGRlLlRpamQgPC0gYXMuRGF0ZShteURGJEVpbmRkYXR1bS5CZXBhYWxkZS5UaWpkLCBmb3JtYXQ9IiVkLSVtLSVZIikKCm15REYkU3Ryb21lbiA8LSBnc3ViKCIgIiwgIiIsIG15REYkU3Ryb21lbikKbXlERiRBYW50YWwuemlla21lbGRpbmdlbiA8LSBnc3ViKCItIiwgIiIsIG15REYkQWFudGFsLnppZWttZWxkaW5nZW4pCm15REYkQWFudGFsLnppZWttZWxkaW5nZW4gPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIi4iLCBteURGJEFhbnRhbC56aWVrbWVsZGluZ2VuKSkKbXlERiREYWdlbi5hZndlemlnIDwtIGdzdWIoIi0iLCAiIiwgbXlERiREYWdlbi5hZndlemlnKQpteURGJERhZ2VuLmFmd2V6aWcgPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIi4iLCBteURGJERhZ2VuLmFmd2V6aWcpKQoKCiMgIyBTb21taWdlIHBlcnNvbmVuIGhlYmJlbiBkZSBvcmdhbmlzYXRpZSBlZXJkZXIgdmVybGF0ZW4Kc3RhcnQuZGF0ZSA8LSBhcy5EYXRlKCIyMDE2LTAxLTMxIikKbXlERiA8LSBteURGICU+JSBmaWx0ZXIoUGVpbGRhdHVtID09ICIyMDE3LTAxLTMxIiwgRWluZGRhdHVtLkJlcGFhbGRlLlRpamQgPiBzdGFydC5kYXRlLCBTdHJvbWVuID09ICIiKQoKbXlERiA8LSBtdXRhdGUobXlERiwgRlRFLngud2Vya2RhZ2VuPUZURSozNjUsIEZURS54LnppZWt0ZWRhZ2VuPUZURSpEYWdlbi5hZndlemlnKQpzdW1tYXJ5KG15REYpCmBgYAoKIyMjIEJlcmVrZW4gemlla3RldmVyenVpbQpgYGB7cn0KdG90LmFhbnQuemlla3RlZGFnZW4gPC0gc3VtKG15REYkRlRFLnguemlla3RlZGFnZW4sIG5hLnJtID0gVFJVRSkKdG90LmFhbnQud2Vya2RhZ2VuIDwtIHN1bShteURGJEZURS54LndlcmtkYWdlbikKemlla3RldmVyenVpbSA9IHRvdC5hYW50LnppZWt0ZWRhZ2VuIC8gdG90LmFhbnQud2Vya2RhZ2VuCgp0b3QuYWFudC56aWVrdGVkYWdlbgp0b3QuYWFudC53ZXJrZGFnZW4Kemlla3RldmVyenVpbQoKYGBgCgojIyMgVmlzdWFsaXNlZXIgZGF0YQoKYGBge3J9CgpwMSA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fZnJlcXBvbHkoYWVzKHg9TGVlZnRpamQsIGNvbG9yID0gTWFuLlZyb3V3KSwgYmlud2lkdGggPSAyKQpnZ3Bsb3RseShwMSwgd2lkdGggPSA4MDApCmBgYAoKYGBge3J9CnAyIDwtIGdncGxvdChteURGKSArCiAgZ2VvbV9ib3hwbG90KGFlcyh4PU1hbi5Wcm91dywgeT1MZWVmdGlqZCwgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwMiwgd2lkdGggPSA4MDApCgpgYGAKCmBgYHtyfQpwMyA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fcG9pbnQoYWVzKHg9TGVlZnRpamQsIHk9RGFnZW4uYWZ3ZXppZywgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwMywgd2lkdGggPSA4MDApCmBgYAoKYGBge3J9CnA0IDwtIGdncGxvdChteURGKSArCiAgZ2VvbV9wb2ludChhZXMoeD1PcmdhbmlzYXRpZS5uaXZlYXUuMSAsIHk9RGFnZW4uYWZ3ZXppZywgZmlsbD1UeXBlLkNvbnRyYWN0KSkKZ2dwbG90bHkocDQsIHdpZHRoID0gODAwKQpgYGAKCmBgYHtyfQpwNCA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fYm94cGxvdChhZXMoeD1NYW4uVnJvdXcgLCB5PUZURSwgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwNCwgd2lkdGggPSA4MDApCmBgYAoKIyMjIE11bHRpdmFyaWF0ZSBhbmFseXNpcwoKYGBge3J9CmdycE1WIDwtIGdyb3VwX2J5KG15REYsIE1hbi5Wcm91dykKc3VtbWFyaXplKGdycE1WLCBBYW50YWwgPSBuKCksIEdlbS5MZWVmdGlqZCA9IG1lYW4oTGVlZnRpamQpKQpzdW1tYXJpemUoZ3JwTVYsIEFhbnRhbCA9IG4oKSwgR2VtLmRnbi5hZncgPSBtZWFuKERhZ2VuLmFmd2V6aWcsIG5hLnJtID0gVFJVRSkpCmBgYAoKYGBge3J9CmdycFRDIDwtIGdyb3VwX2J5KG15REYsIFR5cGUuQ29udHJhY3QpCnN1bW1hcml6ZShncnBUQywgQWFudGFsID0gbigpLCBHZW0uTGVlZnRpamQgPSBtZWFuKExlZWZ0aWpkKSkKc3VtbWFyaXplKGdycFRDLCBBYW50YWwgPSBuKCksIEdlbS5kZ24uYWZ3ID0gbWVhbihEYWdlbi5hZndlemlnLCBuYS5ybSA9IFRSVUUpKQpgYGAKCmBgYHtyfQptMTwtIGFvdihEYWdlbi5hZndlemlnIH4gVHlwZS5Db250cmFjdCwgZGF0YSA9IG15REYpCnN1bW1hcnkobTEpCgptMjwtIGFvdihEYWdlbi5hZndlemlnIH4gT3JnYW5pc2F0aWUubml2ZWF1LjEsIGRhdGEgPSBteURGKQpzdW1tYXJ5KG0yKQoKbTM8LSBhb3YoRlRFIH4gTWFuLlZyb3V3LCBkYXRhID0gbXlERikKc3VtbWFyeShtMykKCnBsb3QoVHVrZXlIU0QobTMsIGNvbmYubGV2ZWwgPSAwLjk5KSxsYXM9MSwgY29sID0gInJlZCIpCgpgYGAKCg==